xxxxxxxxxx## Choropleth drawing in Bokeh/Matplotlib with geopandas​source links​* https://towardsdatascience.com/lets-make-a-map-using-geopandas-pandas-and-matplotlib-to-make-a-chloropleth-map-dddc31c1983d* https://towardsdatascience.com/a-complete-guide-to-an-interactive-geographical-map-using-python-f4c5197e23e0* https://www.naturalearthdata.com/downloads/110m-cultural-vectors/* https://ourworldindata.org/* https://github.com/owid/owid-datasets* [GIS at UCD and on the Web: Find Spatial Data & Other Datasets for Ireland](https://libguides.ucd.ie/gisguide/FindSpatialData)* [Townlands.ie](https://www.townlands.ie/page/download/)* [kaggle COVID-19 data processing and visualization](https://www.kaggle.com/imdevskp/covid-19-analysis-visualization-comparisons/notebook)source links
​#!pip install descartes#!pip install selenium#!pip install plotly#!pip install calmapimport numpy as npimport pandas as pdimport geopandas as gpdimport jsonimport pylab as plt​# visualizationimport matplotlib.pyplot as mplimport seaborn as snsimport plotly.express as pximport plotly.graph_objs as goimport plotly.figure_factory as fffrom plotly.subplots import make_subplotsimport calmapimport folium​# color pallettecnf, dth, rec, act = '#393e46', '#ff2e63', '#21bf73', '#fe9801'​from bokeh.io import output_file, show, output_notebook, export_pngfrom bokeh.models import ColumnDataSource, GeoJSONDataSource, LinearColorMapper, ColorBarfrom bokeh.plotting import figurefrom bokeh.palettes import brewerfrom bokeh.layouts import widgetbox, row, column​import panel as pnimport panel.widgets as pnwpn.extension()output_notebook()(Before moving on to the mapping, let's first try to map the covid19 data the same way!)(Before moving on to the mapping, let's first try to map the covid19 data the same way!)
# source: https://www.kaggle.com/sudalairajkumar/novel-corona-virus-2019-dataset/datacorona_df = pd.read_csv("data/covid_19_data.csv")​corona_df.head()corona_df.shapecorona_df.ndimcorona_df.dtypesx
corona_df["Confirmed"].describe()x
corona_df["Deaths"].describe()corona_df["Recovered"].describe()xxxxxxxxxxLet's splitt the dataset into three separate, and save into csvLet's splitt the dataset into three separate, and save into csv
#todrop = corona_df[["Deaths", "Recovered"]]​# Delete the "Area" column from the dataframecorona_confirmed_df = corona_df.drop(["Deaths","Recovered"], axis=1)# OR#corona_confirmed_df = corona_df.drop(todrop, axis=1)​# Alternatively, delete columns using the columns parameter of drop#corona_confirmed_df = corona_df.drop(columns=["Deaths","Recovered"])# OR#corona_confirmed_df = corona_df.iloc[:, [0,1,2,3,4,5]]​corona_confirmed_df.tail(3)x
corona_deaths_df = corona_df.drop(["Confirmed","Recovered"], axis=1)​# checkcorona_deaths_df.tail(3)corona_recovered_df = corona_df.drop(["Confirmed","Deaths"], axis=1)​# checkcorona_recovered_df.tail(3)# Let's save these files into csv format for later use. Place them into the data foldercorona_confirmed_df.to_csv('covid_19_data_confirmed_24042020.csv', index = False, header=True)corona_deaths_df.to_csv('covid_19_data_deaths_24042020.csv', index = False, header=True)corona_recovered_df.to_csv('covid_19_data_recovered_24042020.csv', index = False, header=True)xxxxxxxxxx# Now, let's read these files and explore themx
# source: https://raw.githubusercontent.com/dnzengou/map-dashboard/master/data/covid_19_data_confirmed_26042020.csvcorona_confirmed_df = pd.read_csv("data/covid_19_data_confirmed_24042020.csv")​corona_confirmed_df.head()# source: https://raw.githubusercontent.com/dnzengou/map-dashboard/master/data/covid_19_data_deaths_26042020.csvcorona_deaths_df = pd.read_csv("data/covid_19_data_deaths_24042020.csv")​corona_deaths_df.head()# source: https://raw.githubusercontent.com/dnzengou/map-dashboard/master/data/covid_19_data_recovered_26042020.csvcorona_recovered_df = pd.read_csv("data/covid_19_data_recovered_24042020.csv")​corona_recovered_df.head()Some countries are included in the data despite having zero confirmed cases. So we remove these:Some countries are included in the data despite having zero confirmed cases. So we remove these:
corona_df = corona_df[corona_df.Confirmed != 0]​corona_df.head()# Let's do the same for corona_confirmed_dfcorona_confirmed_df = corona_confirmed_df[corona_confirmed_df.Confirmed != 0]​corona_confirmed_df.head()sorted_df = corona_df.sort_values(['Country/Region', 'ObservationDate']).reset_index(drop=True)# Let's do the same for confirmed cases onlysorted_confirmed_df = corona_confirmed_df.sort_values(['Country/Region', 'ObservationDate']).reset_index(drop=True)# Let's do the same for death cases onlysorted_deaths_df = corona_deaths_df.sort_values(['Country/Region', 'ObservationDate']).reset_index(drop=True)# Let's do the same for recovered cases onlysorted_recovered_df = corona_recovered_df.sort_values(['Country/Region', 'ObservationDate']).reset_index(drop=True)Some countries, such as China, are split into different provinces/states. Since we just want the total number of cases per country, we get the sum for each country at each date:Some countries, such as China, are split into different provinces/states. Since we just want the total number of cases per country, we get the sum for each country at each date:
sum_df = sorted_df.groupby(['Country/Region', 'ObservationDate'], as_index=False).sum()# Let's do the same for confirmed cases onlysum_confirmed_df = sorted_confirmed_df.groupby(['Country/Region', 'ObservationDate'], as_index=False).sum()# Let's do the same for death cases onlysum_deaths_df = sorted_deaths_df.groupby(['Country/Region', 'ObservationDate'], as_index=False).sum()# Let's do the same for recovered cases onlysum_recovered_df = sorted_recovered_df.groupby(['Country/Region', 'ObservationDate'], as_index=False).sum()We are going to plot the log of the number of confirmed cases for each country, as there are a couple of countries, such as China and Italy, with a lot more cases compared to other countries.We are going to plot the log of the number of confirmed cases for each country, as there are a couple of countries, such as China and Italy, with a lot more cases compared to other countries.
sum_df['log_Confirmed'] = np.log10(sum_df['Confirmed'])# This is the same as sum_df_confirmedWe also need to convert the ObservationDate to unix time in nanoseconds:We also need to convert the ObservationDate to unix time in nanoseconds:
sum_df['date_sec'] = pd.to_datetime(sum_df['ObservationDate']).astype(int) / 10**9sum_df['date_sec'] = sum_df['date_sec'].astype(int).astype(str)# Let's do the same for confirmed cases onlysum_confirmed_df['date_sec'] = pd.to_datetime(sum_confirmed_df['ObservationDate']).astype(int) / 10**9sum_confirmed_df['date_sec'] = sum_confirmed_df['date_sec'].astype(int).astype(str)# Let's do the same for death cases onlysum_deaths_df['date_sec'] = pd.to_datetime(sum_deaths_df['ObservationDate']).astype(int) / 10**9sum_deaths_df['date_sec'] = sum_deaths_df['date_sec'].astype(int).astype(str)# Let's do the same for recovered cases onlysum_recovered_df['date_sec'] = pd.to_datetime(sum_recovered_df['ObservationDate']).astype(int) / 10**9sum_recovered_df['date_sec'] = sum_recovered_df['date_sec'].astype(int).astype(str)# Let's save these files into csv format for later use. Place them into the data folder.sum_confirmed_df.to_csv('sum_confirmed_df_24042020.csv', index = False, header=True)sum_deaths_df.to_csv('sum_deaths_df_24042020.csv', index = False, header=True)sum_recovered_df.to_csv('sum_recovered_df_24042020.csv', index = False, header=True)# Let's check the data structuresum_confirmed_df.head(2)sum_deaths_df.tail(2)sum_recovered_df.tail(2)We can now select the columns needed for the map and discard the others:We can now select the columns needed for the map and discard the others:
#joined_df = joined_df[['Country/Region', 'date_sec', 'log_Confirmed', 'geometry']]# This is the same as for sum_confirmed_dfsum_df = sum_df[['Country/Region', 'date_sec', 'log_Confirmed']]​sum_df.tail(5)# save as csv filesum_df.to_csv('sum_df_24042020.csv', index = False, header=True)x
fig_c = px.bar(sum_confirmed_df, x="date_sec", y="Confirmed", color_discrete_sequence = [act])​fig_c.show()fig_d = px.bar(sum_deaths_df, x="date_sec", y="Deaths", color_discrete_sequence = [dth])​fig_d.show()​fig = make_subplots(rows=1, cols=2, shared_xaxes=False, horizontal_spacing=0.1, subplot_titles=('Confirmed cases', 'Deaths reported'))​fig.add_trace(fig_c['data'][0], row=1, col=1)fig.add_trace(fig_d['data'][0], row=1, col=2)​fig.update_layout(height=480)fig.show()​Let's now get back to mapping data with matplotlib and then bokeh​Let's now get back to mapping data with matplotlib and then bokeh
shapefile = 'data/ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp'#Read shapefile using Geopandasgdf = gpd.read_file(shapefile)[['ADMIN', 'ADM0_A3', 'geometry']]#Rename columns.gdf.columns = ['country', 'country_code', 'geometry']gdf = gdf.drop(gdf.index[159])covid19 = pd.read_csv('data/covid19_24042020.csv').set_index('name')print (covid19)def get_dataset(name,key=None,date_sec=None): # we choose date_sec (int) instead of ObservationDate (str) url = covid19.loc[name].url df = pd.read_csv(url) if date_sec is not None: df = df[df['date_sec'] == date_sec] #Merge dataframes gdf (shape file) and df. if key is None: key = df.columns[4] merged = gdf.merge(df, left_on = 'country', right_on = 'Country/Region', how = 'left') merged[key] = merged[key].fillna(0) return merged, key​#get_dataset('Literate world population')#covid19 = pd.read_csv('data/covid19_24042020.csv').set_index('name')#print (covid19)#def get_dataset(name,key=None,ObservationDate=None): # url = covid19.loc[name].url# df = pd.read_csv(url)# if ObservationDate is not None:# df = df[df['ObservationDate'] == ObservationDate]# #Merge dataframes gdf (shape file) and df.# if key is None:# key = df.columns[1]# merged = gdf.merge(df, left_on = 'country', right_on = 'Country/Region', how = 'left')# merged[key] = merged[key].fillna(0) # return merged, key​#get_dataset('Literate world population')x
datasetname='Time series covid19 confirmed global'df,key = get_dataset(datasetname, date_sec=1582848000)fig, ax = plt.subplots(1, figsize=(14, 8))df.plot(column=key, cmap='OrRd', linewidth=0.8, ax=ax, edgecolor='black')#df.plot(column=key, cmap='YlGnBu', linewidth=0.8, ax=ax, edgecolor='grey')ax.axis('off')ax.set_title('%s 4/23/2020 (date_sec=1582848000)' %datasetname, fontsize=18)plt.tight_layout()plt.savefig('test_map-covid19_25042020_bis.png',dpi=150)#datasetname='Time series covid19 confirmed global'#df,key = get_dataset(datasetname, ObservationDate=4/23/2020)#fig, ax = plt.subplots(1, figsize=(14, 8))##df.plot(column=key, cmap='OrRd', linewidth=0.8, ax=ax, edgecolor='black')#df.plot(column=key, cmap='YlGnBu', linewidth=0.8, ax=ax, edgecolor='grey')#ax.axis('off')#ax.set_title('%s 4/23/2020' %datasetname, fontsize=18)#plt.tight_layout()#plt.savefig('test_map-covid19_25042020.png',dpi=150)def get_geodatasource(gdf): """Get getjsondatasource from geopandas object""" json_data = json.dumps(json.loads(gdf.to_json())) return GeoJSONDataSource(geojson = json_data)def bokeh_plot_map(gdf, column=None, title=''): """Plot bokeh map from GeoJSONDataSource """ geosource = get_geodatasource(gdf) #palette = brewer['YlGnBu'][8]OrRd palette = brewer['OrRd'][8] palette = palette[::-1] vals = gdf[column] #Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors. color_mapper = LinearColorMapper(palette = palette, low = vals.min(), high = vals.max()) #color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8, width=500, height=20, # location=(0,0), orientation='horizontal') color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8, width=10, location=(0,0))​ #tools = 'wheel_zoom,pan,reset' tools = 'box_select,box_zoom,lasso_select,pan,reset' p = figure(title = title, plot_height=400 , plot_width=850, toolbar_location='right', tools=tools) #p.xgrid.grid_line_color = None #p.ygrid.grid_line_color = None p.xaxis.visible = False p.xgrid.visible = False p.yaxis.visible = False p.ygrid.visible = False # Set autohide to true to only show the toolbar when mouse is over plot p.toolbar.autohide = True​ #Add patch renderer to figure p.patches('xs','ys', source=geosource, fill_alpha=1, line_width=0.5, line_color='black', fill_color={'field' :column , 'transform': color_mapper}) #Specify figure layout. p.add_layout(color_bar, 'right') return p​p = bokeh_plot_map(df, key, title=(datasetname+' on 4/23/2020 (date_sec=1582848000)'))​#Display figure inline in Jupyter Notebook.output_notebook()​export_png(p, filename="plot_covid19_2042020.png") # relies on selenium packagepn.pane.Bokeh(p)def map_dash(): """Map dashboard""" from bokeh.models.widgets import DataTable map_pane = pn.pane.Bokeh(width=850, ax=ax) data_select = pnw.Select(name='dataset',options=list(covid19.index)) #date_slider = pnw.IntSlider(start=1582848000,end=1584403200,value=1584316800) date_slider = pnw.IntSlider(start=1583020800,end=1584403200,value=1587600000) # start=01032020, end=04232020 def update_map(event): gdf,key = get_dataset(name=data_select.value,date_sec=date_slider.value) map_pane.object = bokeh_plot_map(gdf, key) return date_slider.param.watch(update_map,'value') date_slider.param.trigger('value') data_select.param.watch(update_map,'value') app = pn.Column(pn.Row(data_select,date_slider),map_pane) return app​app = map_dash()pn.extension()app​